{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1958cc83",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ce2356d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting requests-html\n",
      "  Downloading requests_html-0.10.0-py3-none-any.whl (13 kB)\n",
      "Requirement already satisfied: requests in c:\\users\\asus\\anaconda3\\lib\\site-packages (from requests-html) (2.31.0)\n",
      "Collecting pyquery (from requests-html)\n",
      "  Downloading pyquery-2.0.0-py3-none-any.whl (22 kB)\n",
      "Collecting fake-useragent (from requests-html)\n",
      "  Obtaining dependency information for fake-useragent from https://files.pythonhosted.org/packages/d0/4b/1479877468e20c9a5719336a52bb0ee6f822d387b67f05a0c81e098064cc/fake_useragent-1.2.1-py3-none-any.whl.metadata\n",
      "  Downloading fake_useragent-1.2.1-py3-none-any.whl.metadata (11 kB)\n",
      "Collecting parse (from requests-html)\n",
      "  Obtaining dependency information for parse from https://files.pythonhosted.org/packages/9c/57/6c51ccd70de3ebcfb0bb5b0eea2ac0ab13c51ab55043a7243faef9eb58ef/parse-1.19.1-py2.py3-none-any.whl.metadata\n",
      "  Downloading parse-1.19.1-py2.py3-none-any.whl.metadata (20 kB)\n",
      "Collecting bs4 (from requests-html)\n",
      "  Downloading bs4-0.0.1.tar.gz (1.1 kB)\n",
      "  Preparing metadata (setup.py): started\n",
      "  Preparing metadata (setup.py): finished with status 'done'\n",
      "Requirement already satisfied: w3lib in c:\\users\\asus\\anaconda3\\lib\\site-packages (from requests-html) (1.21.0)\n",
      "Collecting pyppeteer>=0.0.14 (from requests-html)\n",
      "  Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)\n",
      "     ---------------------------------------- 0.0/83.4 kB ? eta -:--:--\n",
      "     -------------- ------------------------- 30.7/83.4 kB ? eta -:--:--\n",
      "     ---------------------------- --------- 61.4/83.4 kB 812.7 kB/s eta 0:00:01\n",
      "     -------------------------------------- 83.4/83.4 kB 937.7 kB/s eta 0:00:00\n",
      "Requirement already satisfied: appdirs<2.0.0,>=1.4.3 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.4.4)\n",
      "Requirement already satisfied: certifi>=2021 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (2023.7.22)\n",
      "Requirement already satisfied: importlib-metadata>=1.4 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (6.0.0)\n",
      "Collecting pyee<9.0.0,>=8.1.0 (from pyppeteer>=0.0.14->requests-html)\n",
      "  Downloading pyee-8.2.2-py2.py3-none-any.whl (12 kB)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.42.1 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (4.65.0)\n",
      "Requirement already satisfied: urllib3<2.0.0,>=1.25.8 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyppeteer>=0.0.14->requests-html) (1.26.16)\n",
      "Collecting websockets<11.0,>=10.0 (from pyppeteer>=0.0.14->requests-html)\n",
      "  Downloading websockets-10.4-cp311-cp311-win_amd64.whl (101 kB)\n",
      "     ---------------------------------------- 0.0/101.4 kB ? eta -:--:--\n",
      "     -------------------------------------- 101.4/101.4 kB 2.9 MB/s eta 0:00:00\n",
      "Requirement already satisfied: beautifulsoup4 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from bs4->requests-html) (4.12.2)\n",
      "Requirement already satisfied: lxml>=2.1 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from pyquery->requests-html) (4.9.2)\n",
      "Collecting cssselect>=1.2.0 (from pyquery->requests-html)\n",
      "  Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from requests->requests-html) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from requests->requests-html) (3.4)\n",
      "Requirement already satisfied: six>=1.4.1 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from w3lib->requests-html) (1.16.0)\n",
      "Requirement already satisfied: zipp>=0.5 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from importlib-metadata>=1.4->pyppeteer>=0.0.14->requests-html) (3.11.0)\n",
      "Requirement already satisfied: colorama in c:\\users\\asus\\anaconda3\\lib\\site-packages (from tqdm<5.0.0,>=4.42.1->pyppeteer>=0.0.14->requests-html) (0.4.6)\n",
      "Requirement already satisfied: soupsieve>1.2 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from beautifulsoup4->bs4->requests-html) (2.4)\n",
      "Downloading fake_useragent-1.2.1-py3-none-any.whl (14 kB)\n",
      "Downloading parse-1.19.1-py2.py3-none-any.whl (18 kB)\n",
      "Building wheels for collected packages: bs4\n",
      "  Building wheel for bs4 (setup.py): started\n",
      "  Building wheel for bs4 (setup.py): finished with status 'done'\n",
      "  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1264 sha256=62d274787d2fdd1246845a8073a659526e41a92804c649c8e6948d997a6ea2bb\n",
      "  Stored in directory: c:\\users\\asus\\appdata\\local\\pip\\cache\\wheels\\d4\\c8\\5b\\b5be9c20e5e4503d04a6eac8a3cd5c2393505c29f02bea0960\n",
      "Successfully built bs4\n",
      "Installing collected packages: pyee, parse, fake-useragent, websockets, cssselect, pyquery, pyppeteer, bs4, requests-html\n",
      "  Attempting uninstall: cssselect\n",
      "    Found existing installation: cssselect 1.1.0\n",
      "    Uninstalling cssselect-1.1.0:\n",
      "      Successfully uninstalled cssselect-1.1.0\n",
      "Successfully installed bs4-0.0.1 cssselect-1.2.0 fake-useragent-1.2.1 parse-1.19.1 pyee-8.2.2 pyppeteer-1.0.2 pyquery-2.0.0 requests-html-0.10.0 websockets-10.4\n"
     ]
    }
   ],
   "source": [
    "!pip install requests-html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "352c14ba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'3bc00a4927fc4398a5bd552a7fb2ae0e.htm', 'df632618d31b40798bf71b8ea2d103ca.htm', '../zjnf/tsnf/index.htm', '../xxgk/xxxl/index.htm', '2ca11309128e42a58ed4f1c42be754d3.htm', 'http://gj.nfu.edu.cn/Home/Waishi/waishilist/class/1/p/1.html', 'e37b88d9a4fd49b89a59af13f09246db.htm', 'http://jx.nfu.edu.cn/', '../zjnf/jtzy/index.htm', 'f2bf2742d7de4ce09be7082fef186869.htm', 'index4.htm', 'index.htm', '53dd98b1eb6c4d7f8def44592e519c45.htm', '../rcpy/msjs/index.htm', '9d85c517440d4128a0e1a7bc1a4a9f1b.htm', '96267573495b4f66a490f2ba9ec3ea0c.htm', '../tzgg/index.htm', '../zjnf/shfw/index.htm', '023ce37514814a628c45433a3924628b.htm', 'https://www.cnki.net/', 'http://www.moe.gov.cn/', 'index121.htm', '../xxgk/xxjj/index.htm', '../xxgk/nfdsj/index.htm', 'http://www.sysu.edu.cn/2012/cn/index.htm', '../zjnf/index.htm', '../xxgk/index.htm', '92c408c6529d4d26a723d841d6485a1e.htm', '../index.htm', '4f75495fe0e94d378fbabb94fabba1bd.htm', '../jgsz/index.htm', 'de6768f996eb44f4ad0c9bb169903e98.htm', '../zggcddsxxjy/index.htm', '../jxky/index.htm', 'http://cpc.nfu.edu.cn/', '../ztb/index.htm', '../xxgk/xrld/index.htm', 'http://lib.nfu.edu.cn/', '0803663924a442588ee67bce4b9249be.htm', 'http://www.gdmbjy.cn/', '../gjdt/index.htm', '../rcpy/jxjy/index.htm', 'http://jw.nfu.edu.cn/', 'd2163668db974ea596874c63d1bb7776.htm', '../jgsz/gljg/index.htm', '../rcpy/bkjy/index.htm', 'https://www.gpowersoft.com/', '../xcyx/index.htm', '../zsjy/jyfw/index.htm', '64d88d805cb24e29818796b5ed03bd84.htm', '../rczp/glxl/index.htm', 'a0b30b72b462428cb239e90fe175a9ce.htm', '../tsg/index.htm', '../hzjl/index.htm', '../zsjy/index.htm', '../rczp/jsxl/index.htm', 'http://gj.nfu.edu.cn/', '../jxky/kyjg/index.htm', 'http://das.nfu.edu.cn/', 'http://ky.nfu.edu.cn/', 'http://www.gz.gov.cn/', 'fb7ac3185a354c3db858d063d2d46239.htm', 'http://zsb.nfu.edu.cn/', 'c6c2ea21061e41eda1f5f27a8baf6cc2.htm', '../zjnf/ylfw/index.htm', '../dshyx/index.htm', '../xxgk/xhxxxg/index.htm', '../jgsz/cswyh/index.htm', 'http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=44011702000081', '../jgsz/yxsz/index.htm', 'd6f0a1cedeac423fb6eda482d72c56ce.htm', '../rcpy/index.htm', '../xydt/index.htm', 'index1.htm', 'http://www.gdpr.com/', '../rczp/index.htm', '../qzzggcdjd100zn/index.htm', '7148917faf2f437abeaf225e966dd502.htm', 'https://beian.miit.gov.cn/', 'http://edu.gd.gov.cn/', 'index3.htm', 'http://en.nfu.edu.cn/', '../zjnf/xb/index.htm', 'index2.htm'}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "r = session.get('https://www.nfu.edu.cn/xxyw/index.htm')\n",
    "\n",
    "# 获取页面上的所有链接。\n",
    "all_links =  r.html.links\n",
    "print(all_links)\n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3b2c85b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'https://www.nfu.edu.cn/xydt/index.htm', 'https://www.nfu.edu.cn/zjnf/index.htm', 'https://www.nfu.edu.cn/xxyw/index1.htm', 'http://gj.nfu.edu.cn/Home/Waishi/waishilist/class/1/p/1.html', 'https://www.nfu.edu.cn/xxyw/96267573495b4f66a490f2ba9ec3ea0c.htm', 'https://www.nfu.edu.cn/zjnf/jtzy/index.htm', 'http://jx.nfu.edu.cn/', 'https://www.nfu.edu.cn/zjnf/ylfw/index.htm', 'https://www.nfu.edu.cn/rcpy/msjs/index.htm', 'https://www.nfu.edu.cn/xxyw/df632618d31b40798bf71b8ea2d103ca.htm', 'https://www.nfu.edu.cn/xxyw/index.htm', 'https://www.nfu.edu.cn/xxyw/index121.htm', 'https://www.nfu.edu.cn/xxgk/index.htm', 'https://www.cnki.net/', 'https://www.nfu.edu.cn/rczp/glxl/index.htm', 'http://www.moe.gov.cn/', 'https://www.nfu.edu.cn/xxyw/f2bf2742d7de4ce09be7082fef186869.htm', 'https://www.nfu.edu.cn/jgsz/cswyh/index.htm', 'https://www.nfu.edu.cn/zjnf/shfw/index.htm', 'https://www.nfu.edu.cn/zggcddsxxjy/index.htm', 'http://www.sysu.edu.cn/2012/cn/index.htm', 'https://www.nfu.edu.cn/rcpy/index.htm', 'https://www.nfu.edu.cn/rcpy/bkjy/index.htm', 'https://www.nfu.edu.cn/xxyw/a0b30b72b462428cb239e90fe175a9ce.htm', 'https://www.nfu.edu.cn/xxgk/xhxxxg/index.htm', 'https://www.nfu.edu.cn/zsjy/jyfw/index.htm', 'https://www.nfu.edu.cn/jxky/kyjg/index.htm', 'http://cpc.nfu.edu.cn/', 'https://www.nfu.edu.cn/xxyw/index3.htm', 'https://www.nfu.edu.cn/xxyw/4f75495fe0e94d378fbabb94fabba1bd.htm', 'https://www.nfu.edu.cn/xxyw/3bc00a4927fc4398a5bd552a7fb2ae0e.htm', 'http://lib.nfu.edu.cn/', 'http://www.gdmbjy.cn/', 'http://jw.nfu.edu.cn/', 'https://www.nfu.edu.cn/jxky/index.htm', 'https://www.nfu.edu.cn/jgsz/gljg/index.htm', 'https://www.nfu.edu.cn/xxyw/9d85c517440d4128a0e1a7bc1a4a9f1b.htm', 'https://www.gpowersoft.com/', 'https://www.nfu.edu.cn/xcyx/index.htm', 'https://www.nfu.edu.cn/xxyw/64d88d805cb24e29818796b5ed03bd84.htm', 'https://www.nfu.edu.cn/gjdt/index.htm', 'https://www.nfu.edu.cn/xxyw/d2163668db974ea596874c63d1bb7776.htm', 'https://www.nfu.edu.cn/rczp/jsxl/index.htm', 'https://www.nfu.edu.cn/xxyw/92c408c6529d4d26a723d841d6485a1e.htm', 'https://www.nfu.edu.cn/xxgk/xxxl/index.htm', 'https://www.nfu.edu.cn/xxyw/7148917faf2f437abeaf225e966dd502.htm', 'http://gj.nfu.edu.cn/', 'https://www.nfu.edu.cn/zjnf/xb/index.htm', 'https://www.nfu.edu.cn/xxyw/c6c2ea21061e41eda1f5f27a8baf6cc2.htm', 'http://das.nfu.edu.cn/', 'https://www.nfu.edu.cn/xxyw/2ca11309128e42a58ed4f1c42be754d3.htm', 'https://www.nfu.edu.cn/xxyw/index2.htm', 'https://www.nfu.edu.cn/xxyw/de6768f996eb44f4ad0c9bb169903e98.htm', 'https://www.nfu.edu.cn/ztb/index.htm', 'https://www.nfu.edu.cn/index.htm', 'http://ky.nfu.edu.cn/', 'http://www.gz.gov.cn/', 'https://www.nfu.edu.cn/xxyw/0803663924a442588ee67bce4b9249be.htm', 'https://www.nfu.edu.cn/hzjl/index.htm', 'https://www.nfu.edu.cn/xxyw/d6f0a1cedeac423fb6eda482d72c56ce.htm', 'http://zsb.nfu.edu.cn/', 'https://www.nfu.edu.cn/zsjy/index.htm', 'https://www.nfu.edu.cn/xxyw/023ce37514814a628c45433a3924628b.htm', 'https://www.nfu.edu.cn/tsg/index.htm', 'http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=44011702000081', 'https://www.nfu.edu.cn/xxgk/xxjj/index.htm', 'https://www.nfu.edu.cn/zjnf/tsnf/index.htm', 'http://www.gdpr.com/', 'https://www.nfu.edu.cn/dshyx/index.htm', 'https://www.nfu.edu.cn/rcpy/jxjy/index.htm', 'https://www.nfu.edu.cn/xxyw/e37b88d9a4fd49b89a59af13f09246db.htm', 'https://www.nfu.edu.cn/xxyw/fb7ac3185a354c3db858d063d2d46239.htm', 'https://www.nfu.edu.cn/xxyw/index4.htm', 'https://www.nfu.edu.cn/xxyw/53dd98b1eb6c4d7f8def44592e519c45.htm', 'https://www.nfu.edu.cn/jgsz/yxsz/index.htm', 'https://beian.miit.gov.cn/', 'https://www.nfu.edu.cn/jgsz/index.htm', 'http://edu.gd.gov.cn/', 'https://www.nfu.edu.cn/rczp/index.htm', 'http://en.nfu.edu.cn/', 'https://www.nfu.edu.cn/xxgk/nfdsj/index.htm', 'https://www.nfu.edu.cn/tzgg/index.htm', 'https://www.nfu.edu.cn/xxgk/xrld/index.htm', 'https://www.nfu.edu.cn/qzzggcdjd100zn/index.htm'}\n"
     ]
    }
   ],
   "source": [
    "# 获取页面上的所有链接，以绝对路径的方式。\n",
    "all_absolute_links = r.html.absolute_links\n",
    "print(all_absolute_links)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4e68194e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2023-09-12\n",
      "厚德和合，传承创新！中医药健康学院2023级新生开学典礼暨首届...\n",
      "{'https://www.nfu.edu.cn/xxyw/9d85c517440d4128a0e1a7bc1a4a9f1b.htm'}\n",
      "2023-09-11\n",
      "新学期，以一场研讨会拉开序幕！\n",
      "{'https://www.nfu.edu.cn/xxyw/92c408c6529d4d26a723d841d6485a1e.htm'}\n",
      "2023-09-06\n",
      "我校开展2023年秋季学期教学检查工作\n",
      "{'https://www.nfu.edu.cn/xxyw/3bc00a4927fc4398a5bd552a7fb2ae0e.htm'}\n",
      "2023-09-10\n",
      "躬耕教坛，强国有我——我校开展庆祝2023年教师节暨师德建设主题教育月系列活...\n",
      "{'https://www.nfu.edu.cn/xxyw/53dd98b1eb6c4d7f8def44592e519c45.htm'}\n",
      "2023-09-10\n",
      "来自校长、书记的节日问候\n",
      "{'https://www.nfu.edu.cn/xxyw/d2163668db974ea596874c63d1bb7776.htm'}\n",
      "2023-09-06\n",
      "从化区副区长、区公安分局局长陈航一行莅临我校开展食品安全“两个责任”包保督导\n",
      "{'https://www.nfu.edu.cn/xxyw/df632618d31b40798bf71b8ea2d103ca.htm'}\n",
      "2023-09-06\n",
      "首批上线！我校4门课程登陆全国高校课程思政教学资源服务平台\n",
      "{'https://www.nfu.edu.cn/xxyw/4f75495fe0e94d378fbabb94fabba1bd.htm'}\n",
      "2023-09-02\n",
      "欢迎！南方学院首批新生已就位！\n",
      "{'https://www.nfu.edu.cn/xxyw/64d88d805cb24e29818796b5ed03bd84.htm'}\n",
      "2023-08-26\n",
      "我校教师获“省长杯”工业设计大赛二等奖\n",
      "{'https://www.nfu.edu.cn/xxyw/f2bf2742d7de4ce09be7082fef186869.htm'}\n",
      "2023-08-19\n",
      "我们是，全国总冠军！\n",
      "{'https://www.nfu.edu.cn/xxyw/a0b30b72b462428cb239e90fe175a9ce.htm'}\n",
      "2023-08-18\n",
      "全国二等奖+3！\n",
      "{'https://www.nfu.edu.cn/xxyw/de6768f996eb44f4ad0c9bb169903e98.htm'}\n",
      "2023-08-15\n",
      "我校获邀参加中国抗癌协会CACA指南进校园工作推动会（广东站）\n",
      "{'https://www.nfu.edu.cn/xxyw/96267573495b4f66a490f2ba9ec3ea0c.htm'}\n",
      "2023-08-11\n",
      "学校召开学生宿舍改造工程进度协调会并检查施工现场\n",
      "{'https://www.nfu.edu.cn/xxyw/e37b88d9a4fd49b89a59af13f09246db.htm'}\n",
      "2023-08-11\n",
      "我校获800万省级民办教育发展专项资金支持\n",
      "{'https://www.nfu.edu.cn/xxyw/7148917faf2f437abeaf225e966dd502.htm'}\n",
      "2023-08-04\n",
      "我校斩获多项省教育教学优秀案例\n",
      "{'https://www.nfu.edu.cn/xxyw/d6f0a1cedeac423fb6eda482d72c56ce.htm'}\n",
      "2023-07-21\n",
      "校领导带队检查指导校园安全工作并走访慰问留校师生\n",
      "{'https://www.nfu.edu.cn/xxyw/0803663924a442588ee67bce4b9249be.htm'}\n",
      "2023-07-19\n",
      "暑期获奖连连！给南院师生点赞！\n",
      "{'https://www.nfu.edu.cn/xxyw/2ca11309128e42a58ed4f1c42be754d3.htm'}\n",
      "2023-07-18\n",
      "我校受邀在全国“高等教育数字变革与院校研究专业化”学术研讨会上发言\n",
      "{'https://www.nfu.edu.cn/xxyw/c6c2ea21061e41eda1f5f27a8baf6cc2.htm'}\n",
      "2023-07-15\n",
      "国标项目——我校召开国家标准委广东社会组织能力建设标准化试点项目启动会\n",
      "{'https://www.nfu.edu.cn/xxyw/fb7ac3185a354c3db858d063d2d46239.htm'}\n",
      "2023-07-15\n",
      "我校受邀在广东省教育厅举办的2023年全省学籍学历管理工作培训会作主题报告\n",
      "{'https://www.nfu.edu.cn/xxyw/023ce37514814a628c45433a3924628b.htm'}\n"
     ]
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "r = session.get(\"https://www.nfu.edu.cn/xxyw/index.htm\")\n",
    "\n",
    "# 通过CSS找到新闻标签\n",
    "news = r.html.find('ul.list-ul > li')\n",
    "\n",
    "for new in news:\n",
    "    print(new.text)  # 获得新闻标题\n",
    "    print(new.absolute_links)  # 获得新闻链接"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d8feedc6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "iPhone 15来了！苹果拿出全球首颗3nm芯片，支持Type-c，顶配13999元\n",
      "{'https://news.cnblogs.com/n/750242/'}\n",
      "中国互联网第一次“人才通胀”始末\n",
      "{'https://news.cnblogs.com/n/750238/'}\n",
      "老黄给H100打鸡血：英伟达推出大模型加速包，Llama2推理速度翻倍\n",
      "{'https://news.cnblogs.com/n/750171/'}\n",
      "比Python快6.8万倍，新语言Mojo首次开放下载，LLVM之父打造\n",
      "{'https://news.cnblogs.com/n/750092/'}\n",
      "蔡崇信、吴泳铭正式履新董事长、CEO，张勇荣获“功勋阿里人”\n",
      "{'https://news.cnblogs.com/n/750088/'}\n",
      "中国团队重要突破：首次在猪体内“种”出人类肾脏\n",
      "{'https://news.cnblogs.com/n/750080/'}\n",
      "谷歌25岁了：AI能化解中年危机吗？\n",
      "{'https://news.cnblogs.com/n/750000/'}\n",
      "腾讯称混元大模型中文能力超过GPT3.5 我们一起看看\n",
      "{'https://news.cnblogs.com/n/749958/'}\n",
      "有史以来最遥远星系磁场探测到\n",
      "{'https://news.cnblogs.com/n/749924/'}\n",
      "央视：华为新手机，拆解出了什么？拆出了中国高科技企业的里程碑\n",
      "{'https://news.cnblogs.com/n/749890/'}\n",
      "“大学教师体验送外卖”破圈背后的“是”与“非”\n",
      "{'https://news.cnblogs.com/n/749865/'}\n",
      "DIY大佬自制离子推进器火了，近300万网友围观：星际迷航就是这吧\n",
      "{'https://news.cnblogs.com/n/749842/'}\n",
      "烧假酒，可能是内燃机以后唯一的出路了\n",
      "{'https://news.cnblogs.com/n/749831/'}\n",
      "为什么说加密货币是史上最大的庞氏骗局\n",
      "{'https://news.cnblogs.com/n/749779/'}\n",
      "独立游戏调GPT遭Steam下架，开发者：我的存款和三年半时光都没了\n",
      "{'https://news.cnblogs.com/n/749740/'}\n",
      "AI“调香师”预测气味媲美人类\n",
      "{'https://news.cnblogs.com/n/749717/'}\n",
      "山东一大学教授送外卖，写出来一篇外卖小哥调研文章\n",
      "{'https://news.cnblogs.com/n/749676/'}\n",
      "AI无人机竞速击败人类冠军，Nature：将AlphaGo成果带到物理世界\n",
      "{'https://news.cnblogs.com/n/749606/'}\n"
     ]
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "\n",
    "r = session.get(\"https://news.cnblogs.com/n/recommend\")\n",
    "\n",
    "# 通过CSS找到新闻标签\n",
    "news = r.html.find('h2.news_entry > a')\n",
    "\n",
    "for new in news:\n",
    "    print(new.text)  # 获得新闻标题\n",
    "    print(new.absolute_links)  # 获得新闻链接"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "4001719c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "iPhone 15来了！苹果拿出全球首颗3nm芯片，支持Type-c，顶配13999元\n",
      "{'https://news.cnblogs.com/n/750242/'}\n",
      "\n",
      "中国互联网第一次“人才通胀”始末\n",
      "{'https://news.cnblogs.com/n/750238/'}\n",
      "\n",
      "老黄给H100打鸡血：英伟达推出大模型加速包，Llama2推理速度翻倍\n",
      "{'https://news.cnblogs.com/n/750171/'}\n",
      "\n",
      "比Python快6.8万倍，新语言Mojo首次开放下载，LLVM之父打造\n",
      "{'https://news.cnblogs.com/n/750092/'}\n",
      "\n",
      "蔡崇信、吴泳铭正式履新董事长、CEO，张勇荣获“功勋阿里人”\n",
      "{'https://news.cnblogs.com/n/750088/'}\n",
      "\n",
      "中国团队重要突破：首次在猪体内“种”出人类肾脏\n",
      "{'https://news.cnblogs.com/n/750080/'}\n",
      "\n",
      "谷歌25岁了：AI能化解中年危机吗？\n",
      "{'https://news.cnblogs.com/n/750000/'}\n",
      "\n",
      "腾讯称混元大模型中文能力超过GPT3.5 我们一起看看\n",
      "{'https://news.cnblogs.com/n/749958/'}\n",
      "\n",
      "有史以来最遥远星系磁场探测到\n",
      "{'https://news.cnblogs.com/n/749924/'}\n",
      "\n",
      "央视：华为新手机，拆解出了什么？拆出了中国高科技企业的里程碑\n",
      "{'https://news.cnblogs.com/n/749890/'}\n",
      "\n",
      "“大学教师体验送外卖”破圈背后的“是”与“非”\n",
      "{'https://news.cnblogs.com/n/749865/'}\n",
      "\n",
      "DIY大佬自制离子推进器火了，近300万网友围观：星际迷航就是这吧\n",
      "{'https://news.cnblogs.com/n/749842/'}\n",
      "\n",
      "烧假酒，可能是内燃机以后唯一的出路了\n",
      "{'https://news.cnblogs.com/n/749831/'}\n",
      "\n",
      "为什么说加密货币是史上最大的庞氏骗局\n",
      "{'https://news.cnblogs.com/n/749779/'}\n",
      "\n",
      "独立游戏调GPT遭Steam下架，开发者：我的存款和三年半时光都没了\n",
      "{'https://news.cnblogs.com/n/749740/'}\n",
      "\n",
      "AI“调香师”预测气味媲美人类\n",
      "{'https://news.cnblogs.com/n/749717/'}\n",
      "\n",
      "山东一大学教授送外卖，写出来一篇外卖小哥调研文章\n",
      "{'https://news.cnblogs.com/n/749676/'}\n",
      "\n",
      "AI无人机竞速击败人类冠军，Nature：将AlphaGo成果带到物理世界\n",
      "{'https://news.cnblogs.com/n/749606/'}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "news_links = r.html.find('div.content')\n",
    "news_links\n",
    "for new in news:\n",
    "    print(new.text)  # 获得新闻标题\n",
    "    print(new.absolute_links)  # 获得新闻链接\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e81ed6b",
   "metadata": {},
   "source": [
    "# 项目三 requests_html获取页面图片"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "edb637f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "r = session.get('http://wallhaven.cc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b8e72df4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Response [200]>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "722bc5da",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a4bbaec",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "f44c5548",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://th.wallhaven.cc/small/x6/x6p3y3.jpg\n",
      "x6p3y3.jpg\n",
      "https://th.wallhaven.cc/small/zy/zy5y1o.jpg\n",
      "zy5y1o.jpg\n",
      "https://th.wallhaven.cc/small/x6/x6pl9v.jpg\n",
      "x6pl9v.jpg\n",
      "https://th.wallhaven.cc/small/we/we628p.jpg\n",
      "we628p.jpg\n",
      "https://th.wallhaven.cc/small/ex/ex9gwo.jpg\n",
      "ex9gwo.jpg\n",
      "https://th.wallhaven.cc/small/1p/1pd1o9.jpg\n",
      "1pd1o9.jpg\n",
      "https://th.wallhaven.cc/small/m3/m3zjx1.jpg\n",
      "m3zjx1.jpg\n",
      "https://th.wallhaven.cc/small/1p/1ppld1.jpg\n",
      "1ppld1.jpg\n",
      "https://th.wallhaven.cc/small/yx/yxqzpd.jpg\n",
      "yxqzpd.jpg\n",
      "https://th.wallhaven.cc/small/kx/kx82d6.jpg\n",
      "kx82d6.jpg\n",
      "https://th.wallhaven.cc/small/l8/l83o92.jpg\n",
      "l83o92.jpg\n",
      "https://th.wallhaven.cc/small/1p/1p398w.jpg\n",
      "1p398w.jpg\n",
      "https://th.wallhaven.cc/small/7p/7p39gy.jpg\n",
      "7p39gy.jpg\n",
      "https://th.wallhaven.cc/small/qz/qzdqvr.jpg\n",
      "qzdqvr.jpg\n",
      "https://th.wallhaven.cc/small/zy/zyxvqy.jpg\n",
      "zyxvqy.jpg\n",
      "https://th.wallhaven.cc/small/kx/kx98xd.jpg\n",
      "kx98xd.jpg\n",
      "https://th.wallhaven.cc/small/zy/zygeko.jpg\n",
      "zygeko.jpg\n",
      "https://th.wallhaven.cc/small/kx/kx36mq.jpg\n",
      "kx36mq.jpg\n",
      "https://th.wallhaven.cc/small/m9/m9xyg8.jpg\n",
      "m9xyg8.jpg\n",
      "https://th.wallhaven.cc/small/o5/o59gvl.jpg\n",
      "o59gvl.jpg\n",
      "https://th.wallhaven.cc/small/28/28p95m.jpg\n",
      "28p95m.jpg\n",
      "https://th.wallhaven.cc/small/e7/e7jj6r.jpg\n",
      "e7jj6r.jpg\n",
      "https://th.wallhaven.cc/small/9m/9mjoy1.jpg\n",
      "9mjoy1.jpg\n",
      "https://th.wallhaven.cc/small/j3/j3m8y5.jpg\n",
      "j3m8y5.jpg\n"
     ]
    }
   ],
   "source": [
    "items_img = r.html.find('span.sm-thumb>a>img')\n",
    "items_img\n",
    "\n",
    "for img in items_img:\n",
    "    img_url = img.attrs['src']\n",
    "    #img_url是图片下载链接\n",
    "    print(img_url)\n",
    "    #img_name 是图片储存的名称\n",
    "    print(img_url.split('/')[-1])\n",
    "    img_name = img_url.split('/')[-1]\n",
    "    #1.请求图片下载地址，获取图片\n",
    "    img_response = requests.get(img_url)\n",
    "    # 2. 下载图片，用写入的方式，指定存放的文件路径\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43ea35f7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
